## A Dynamic model of endogenous fishing duration
# Main code

## ----library -----

pacman::p_load(tidyverse,
               lubridate, 
               ggthemes,
               MASS, 
               stargazer,    # generate regression table
               foreign,      # for Stata data
               glmnet,       # Lasso estimation
               psych,        # for summary statistics
               knitr,        # kable
               kableExtra,   # fancy table
               Benchmarking, # DEA analysis
               frontier,     # SFA analysis
               estimatr,     # Robust Estimator
               patchwork,    # easy layout of ggplots
               extrafont,    # ggplot fonts
               huxtable,     # use huxtable
               recipes)      # to use discretize 

## ---- Define functions ------------

'%notin%' <- Negate('%in%')

## ---- Data Load -----------------------------------

# generated in data_man.R
dat3 = read_csv("../../Data/daily_dat3.csv",
                locale = locale(encoding = "UTF-8"))



## ----1st_step_ccp, cache=TRUE, include=FALSE,eval = TRUE---------------
# ==== First Stage: CCP, lasso logit ========================

# --------- LASSO logit ----------------
# using package -glmnet-
# For whole sample

# --- Estimation with number ---
dat.temp_LS = with(dat3, 
                   data.frame(choice,
                              
                              exp_SF_price,
                              exp_BS_price,
                              days_past,
                              days_past^2,
                              ope_days_past,
                              ope_days_past^2,
                              sword_num,
                              bshark_num,
                              sword_unit_wt_interpo,
                              bshark_unit_wt_interpo,
                              
                              # unit weight
                              exp_SF_price*sword_unit_wt_interpo,
                              exp_BS_price*bshark_unit_wt_interpo,
                              
                              # SF cumulative Number 
                              sword_num_cum,
                              exp_SF_price*sword_num_cum,
                              days_past*sword_num_cum,
                              days_past^2*sword_num_cum,
                              ope_days_past*sword_num_cum,
                              ope_days_past^2*sword_num_cum,
                              
                              # BS cumulative number
                              bshark_num_cum,
                              exp_BS_price*bshark_num_cum,
                              days_past*bshark_num_cum,
                              days_past^2*bshark_num_cum,
                              ope_days_past*bshark_num_cum,
                              ope_days_past^2*bshark_num_cum,
                              
                              # Oil Prices
                              oil_price,
                              oil_price*exp_BS_price,
                              oil_price*exp_SF_price,
                              oil_price*days_past,
                              oil_price*days_past^2,
                              oil_price*ope_days_past,
                              oil_price*ope_days_past^2,
                              
                              # Total cost
                              cum_fuelcost,
                              cum_rev,
                              cum_profit,
                              
                              # Elapsed days
                              dat3[,names_days_SF_num],  # days*# of SF num s days ago
                              dat3[,names_days_BS_num] # days*# of BS wt s days ago
                              
                   ))

# --- Estimation with number of fish ---
dat.temp_LS.15 = data.frame(dat3$ope_num,dat.temp_LS) %>%
  dplyr::filter(dat3.ope_num >= 15) %>%
  dplyr::select(-dat3.ope_num) %>%
  dplyr::filter(complete.cases(.))

y.15 = as.vector(dat.temp_LS.15$choice)
x.15 = as.matrix(dat.temp_LS.15[,-1])
x.15.naive = as.matrix(dat.temp_LS.15[,-c(1,36:ncol(dat.temp_LS.15))])  # REFEREE



# Cross Validation: 10-fold
##time = proc.time()
set.seed(3)
# Use cycle because LASSO results varies by randomness of choice of k-folds
# see: https://stats.stackexchange.com/questions/97777/variablity-in-cv-glmnet-results
# cycle for doing 100 cross validations
# and take the average of the mean error curves
# initialize vector for final data.frame with Mean Standard Errors
##MSEs.15 <- NULL

time = proc.time()


cv.15 = cv.glmnet(x.15,y.15,
                  family = "binomial",
                  type.measure = "class",
                  nfolds = 10,
                  alpha = 1)
## MSEs.15 <- cbind(MSEs.15, cv.15$cvm)


lambda.min.15 = cv.15$lambda.min

proc.time() - time

lasso_logit.15 = glmnet(x.15,y.15, family = "binomial",
                        alpha = 1)



# Estimate discounting factor to check.
# We need prediction EV without interaction terms

cv.15.naive = cv.glmnet(x.15.naive,y.15,
                  family = "binomial",
                  type.measure = "class",
                  nfolds = 10,
                  alpha = 1)
## MSEs.15 <- cbind(MSEs.15, cv.15$cvm)


lambda.min.15.naive = cv.15.naive$lambda.min
lasso_logit.15.naive = glmnet(x.15.naive, 
                              y.15, family = "binomial",
                        alpha = 1)

# very naive model (no interactions)

## MSEs.15 <- cbind(MSEs.15, cv.15$cvm)





## ----1st_step_CCP_check ------------------

# separate training and test data
## length()
set.seed(15)
dat.temp_test.15 = data.frame(dat3$trip_id,dat3$ope_num,dat.temp_LS) %>%
  dplyr::filter(dat3.ope_num >= 15) %>%
  dplyr::select(-dat3.ope_num) %>%
  dplyr::filter(complete.cases(.))

tripids = unique(dat.temp_test.15$dat3.trip_id)
tripids_cv = sample(tripids,round(length(tripids)*0.7), replace = FALSE)  # 

dat.temp_test.15_tr = dat.temp_test.15 %>%
  filter(dat3.trip_id %in% tripids_cv)

dat.temp_test.15_te = dat.temp_test.15 %>%
  filter(dat3.trip_id %notin% tripids_cv)

# training data
y_tr.15 = as.vector(dat.temp_test.15_tr$choice)
x_tr.15 = as.matrix(dat.temp_test.15_tr[,-c(1:2)])
y_te.15 = as.vector(dat.temp_test.15_te$choice)
x_te.15 = as.matrix(dat.temp_test.15_te[,-c(1:2)])

# cross validation lasso logit
cv.15_tr = cv.glmnet(x_tr.15,y_tr.15,
                  family = "binomial",
                  type.measure = "class",
                  nfolds = 10,
                  alpha = 1)

glm_naive_tr = glm(choice~exp_SF_price+
      exp_BS_price +
      ope_days_past+
      sword_num+
      bshark_num+
      sword_unit_wt_interpo+
      bshark_unit_wt_interpo+
      sword_num_cum+
      bshark_num_cum+
      oil_price+
      cum_fuelcost+
      cum_rev+
      cum_profit, dat.temp_test.15_tr, family = "binomial")



pred_lam_min <- predict(cv.15_tr, newx = x_te.15, s = "lambda.min", type = "response")
pred_lam_1se <- predict(cv.15_tr, newx = x_te.15, s = "lambda.1se", type = "response")
pred_no_rest <- predict(cv.15_tr, newx = x_te.15, s = min(cv.15$lambda), type = "response")
glm_naive_pred <- predict(glm_naive_tr,newdata = dat.temp_test.15_te,type = "response")

RMSE_lam_min = sqrt(mean((y_te.15 -pred_lam_min)^2))
RMSE_lam_1se = sqrt(mean((y_te.15 -pred_lam_1se)^2))
RMSE_no_rest = sqrt(mean((y_te.15 -pred_no_rest)^2))
RMSE_naive_pred = sqrt(mean((y_te.15 -glm_naive_pred)^2))
RMSE_random = sqrt(mean((y_te.15 -mean(0.9131852))^2))


df_RMSE = data.frame(Model = c('Lasso Logit, $\\lambda_{min}$','Lasso Logit, $\\lambda_{1se}$',"Full Model","Simple Model","Mean dependent var."), `Brier Score` = c(RMSE_lam_min,RMSE_lam_1se,RMSE_no_rest,RMSE_naive_pred,RMSE_random))




## ---- 1st step state :days -------------------

# ======== First Step: state, days ========

# Data for estimating the state transitions
dat3.temp = dat3[dat3$choice == 1,]

# cumulative swordfish
glm_days_past5 = glm(I(days_past_next - days_past) ~ days_past + sword_num + sword_num_cum,
                     data = dat3.temp,
                     family = poisson(link = log))

# conditional distribution
dat3$exp_move = predict(glm_days_past5, newdata = dat3, type = "response")
dat3$days_lambda = dat3$exp_move


## ---- 1st step state: catch -----------

dat_temp0 = dat3 %>% 
  mutate(bshark_wt_disc = floor(bshark_wt/100)) %>%
    mutate(sword_num_disc10 = floor(sword_num/8.33)+1, #max(dat3$sword_num)/9 = 8.333
           bshark_num_disc10 = floor(bshark_num/133.555)+1,   #max(dat3$bshark_num)/9  
           bshark_wt_disc10 = floor(bshark_wt/1666.66)+1  #max(dat3$bshark_wt)/10 = 1666.667 
           ) %>%
  arrange(trip_id,ope_num) %>%  # "order changed here"
  group_by(vname,trip_id) %>%
  mutate(sword_diff = lead(sword_num,1) - sword_num,
         sword_num_next = lead(sword_num,1),
         sword_num_disc10_next = lead(sword_num_disc10,1),
         bshark_num_diff = lead(bshark_num,1) - bshark_num,
         bshark_num_next = lead(bshark_num,1),
        # bshark_num_disc_next = lead(bshark_num_disc,1),
         bshark_num_disc10_next = lead(bshark_num_disc10,1),
         bshark_wt_diff = lead(bshark_wt,1) - bshark_wt,
         bshark_wt_next = lead(bshark_wt,1),
         bshark_wt_disc_next = lead(bshark_wt_disc,1),
         bshark_wt_disc10_next = lead(bshark_wt_disc10,1)) %>%
  ungroup()



# Use GLM to obtain the distribution.
# Poisson for sword_num 
# Log normal for bshark_wt

# Discretized for 10 bins
sword10_glm = glm(sword_num_disc10_next ~ sword_num_disc10 + days_past, data = dat_temp0, family = poisson)

##summary(sword10_glm)

bshark10_glm = glm(bshark_num_disc10_next ~ bshark_num_disc10 + days_past, data = dat_temp0, family = poisson(link=log))

##summary(bshark10_glm)


# predict the lambda for each observation.
dat_temp0$sword_lambda = predict(sword10_glm, newdata = dat_temp0, type = "response")
dat_temp0$bshark_lambda = predict(bshark10_glm, newdata = dat_temp0, type = "response")

# observation x 1-10 matrix for probabilities by observation
  # define fucntion to make the lambda to be x, and x (integer quantiles) to be given parameters (1~10)
  dist_pois = function(x,k){
    dpois(k,x)
  }

#  Matrices of probability distribution by observation
mx_sword_dist_prob = t(sapply(dat_temp0$sword_lambda,dist_pois,k=1:10))
  mx_sword_dist_prob = mx_sword_dist_prob/rowSums(mx_sword_dist_prob) # to make the sum of 10 prob to 1
mx_bshark_dist_prob = t(sapply(dat_temp0$bshark_lambda,dist_pois, k = 1:10))
  mx_bshark_dist_prob = mx_bshark_dist_prob/rowSums(mx_sword_dist_prob)

# Matrix of prob dist by obs for days 
mx_days_dist_prob = t(sapply(dat_temp0$days_lambda, dist_pois, k = 1:7))
  mx_days_dist_prob = mx_days_dist_prob/rowSums(mx_days_dist_prob)
  


## ---- 1st step expected calculation ----------

# integration over the states

# Procedure
# make the data set "dat.pred" to predict the CCP based on the expected states.

dat_temp.15 = dat_temp0 %>% filter(ope_num >= 15)

mx_days_dist_prob.15 = mx_days_dist_prob[dat_temp0$ope_num >=15,]
##dim(mx_days_dist_prob.15)
mx_sword_dist_prob.15 = mx_sword_dist_prob[dat_temp0$ope_num >=15,]
##dim(mx_sword_dist_prob.15)
mx_bshark_dist_prob.15 = mx_bshark_dist_prob[dat_temp0$ope_num >=15,] 

Euler = -digamma(1) # Euler's constant

# store the result for each loop.
# array is 4 dimension. Number of observation x 10 category of sword_num x 10 cat of bshark_num x 7 days_past
array_integ = array(0, dim = c(nrow(dat_temp.15),10,10,7))
array_integ_naive = array(0, dim = c(nrow(dat_temp.15),10,10,7))

# loop to past_days
for(l in 1:7){
    # make all variables for integral
    dat_temp = dat_temp.15
    
    dat_temp$days_past = dat_temp$days_past + l  # each day
    dat_temp$days_past2 = (dat_temp$days_past)^2
    dat_temp$ope_days_past = dat_temp$ope_days_past + l
    # expected days since catch is: today + (expected days elapsed - days elapsed).
    # the second term is the expected additional calendar day
    exp_days_since_catch.15 = data.frame(rep(0,nrow(dat_temp)),dat_temp[,names_days]) + l

  # loop to make bshark_num_disc10
  for(j in 1:10){
    
    dat_temp$bshark_num = (j-1)*133.555 + 133.555/2  # take the median of each bin.

    # loop to make sword_num_disc10 loop
    
    for(i in 1:10){
     
     dat_temp2 = dat_temp # renew df for new i loop 
       
     dat_temp2$sword_num = (i-1)*8.33 + 8.33/2  # take the median of each bin.
     
     # The data frame of expected interaction of past catch and past days
     # 1 column delay: catch_1_ago is now catch_2_ago
      exp_SF_num_past.15 = data.frame(dat_temp2$sword_num,dat_temp2[,names_SF_num]) 
      ##exp_SF_wt_past.15 = data.frame(dat_temp2$exp_SF_wt,dat_temp2[,names_SF_wt]) 
      exp_BS_num_past.15 = data.frame(dat_temp2$bshark_num,dat_temp2[,names_BS_num]) 
      ##exp_BS_wt_past.15 = data.frame(dat_temp2$bshark_wt,dat_temp2[,names_BS_wt]) 


    # make days_elapsed*catch_s_days_ago interaction that is expected. 
      exp_days_SF_num.15 = exp_SF_num_past.15*exp_days_since_catch.15
      ##exp_days_SF_wt.15 = exp_SF_wt_past.15*exp_days_since_catch.15
      exp_days_BS_num.15 = exp_BS_num_past.15*exp_days_since_catch.15
      ##exp_days_BS_wt.15 = exp_BS_wt_past.15*exp_days_since_catch.15
      exp_days2_SF_num.15 = exp_SF_num_past.15*exp_days_since_catch.15^2
      ##exp_days2_SF_wt.15 = exp_SF_wt_past.15*exp_days_since_catch.15^2
      exp_days2_BS_num.15 = exp_BS_num_past.15*exp_days_since_catch.15^2
      ##exp_days2_BS_wt.15 = exp_BS_wt_past.15*exp_days_since_catch.15^2

    # change column name because days_catch_1 is now days_catch_2 in the one period later
      colnames(exp_days_SF_num.15) = paste("days_SF_num",1:36,"ago",sep ="_")
      ##colnames(exp_days_SF_wt.15) = paste("days_SF_wt",1:36,"ago",sep ="_")
      colnames(exp_days_BS_num.15) = paste("days_BS_num",1:36,"ago",sep ="_")
      ##colnames(exp_days_BS_wt.15) = paste("days_BS_wt",1:36,"ago",sep ="_")
      colnames(exp_days2_SF_num.15) = paste("days2_SF_num",1:36,"ago",sep ="_")
      ##colnames(exp_days2_SF_wt.15) = paste("days2_SF_wt",1:36,"ago",sep ="_")
      colnames(exp_days2_BS_num.15) = paste("days2_BS_num",1:36,"ago",sep ="_")
      ##colnames(exp_days2_BS_wt.15) = paste("days2_BS_wt",1:36,"ago",sep ="_")
     
      dat_temp2 = dat_temp2 %>%
        mutate(sword_num_cum = sword_num_cum + sword_num,
               ##sword_wt_cum = sword_wt_cum + exp_SF_wt/1000,
               bshark_num_cum = bshark_num_cum + bshark_num
               ##,bshark_wt_cum = bshark_wt_cum + bshark_wt/1000
               )
               
    # replace past catch-days interaction
      dat_temp2[,names_days_SF_num] = exp_days_SF_num.15[,names_days_SF_num]
      #dat_temp2[,names_days_SF_wt] = exp_days_SF_wt.15[,names_days_SF_wt]
      dat_temp2[,names_days_BS_num] = exp_days_BS_num.15[,names_days_BS_num]
     # dat_temp2[,names_days_BS_wt] = exp_days_BS_wt.15[,names_days_BS_wt]
      dat_temp2[,names_days2_SF_num] = exp_days2_SF_num.15[,names_days2_SF_num]
      #dat_temp2[,names_days2_SF_wt] = exp_days2_SF_wt.15[,names_days2_SF_wt]
      dat_temp2[,names_days2_BS_num] = exp_days2_BS_num.15[,names_days2_BS_num]
     # dat_temp2[,names_days2_BS_wt] = exp_days2_BS_wt.15[,names_days2_BS_wt]
      
      
      # make x for glmnet 
  dat_temp.pred = with(dat_temp2, 
                   data.frame(exp_SF_price,
                              exp_BS_price,
                              days_past,
                              days_past^2,
                              ope_days_past,
                              ope_days_past^2,
                              sword_num,
                              bshark_num,
                              sword_unit_wt_interpo,
                              bshark_unit_wt_interpo,
                              
                              # unit weight
                              exp_SF_price*sword_unit_wt_interpo,
                              exp_BS_price*bshark_unit_wt_interpo,
                              
                              # SF cumulative Number 
                              sword_num_cum,
                              exp_SF_price*sword_num_cum,
                              days_past*sword_num_cum,
                              days_past^2*sword_num_cum,
                              ope_days_past*sword_num_cum,
                              ope_days_past^2*sword_num_cum,
                              
                              
                              # BS cumulative number
                              bshark_num_cum,
                              exp_BS_price*bshark_num_cum,
                              days_past*bshark_num_cum,
                              days_past^2*bshark_num_cum,
                              ope_days_past*bshark_num_cum,
                              ope_days_past^2*bshark_num_cum,
                              
                              
                              # Oil Prices
                              oil_price,
                              oil_price*exp_BS_price,
                              oil_price*exp_SF_price,
                              oil_price*days_past,
                              oil_price*days_past^2,
                              oil_price*ope_days_past,
                              oil_price*ope_days_past^2
                              
                              # Total cost
                                ,cum_fuelcost,
                                cum_rev,
                                cum_profit
                              
                              ,dat_temp2[,names_days_SF_num],  # days*# of SF num s days ago
                              dat_temp2[,names_days_BS_num] # days*# of BS wt s days ago
                   ))

  # make x for glmnet for naive prediction (no interaction of days and catch timing)
  dat_temp.naive.pred = with(dat_temp2, 
                   data.frame(exp_SF_price,
                              exp_BS_price,
                              days_past,
                              days_past^2,
                              ope_days_past,
                              ope_days_past^2,
                              sword_num,
                              bshark_num,
                              sword_unit_wt_interpo,
                              bshark_unit_wt_interpo,
                              
                              # unit weight
                              exp_SF_price*sword_unit_wt_interpo,
                              exp_BS_price*bshark_unit_wt_interpo,
                              
                              # SF cumulative Number 
                              sword_num_cum,
                              exp_SF_price*sword_num_cum,
                              days_past*sword_num_cum,
                              days_past^2*sword_num_cum,
                              ope_days_past*sword_num_cum,
                              ope_days_past^2*sword_num_cum,
                              
                              # BS cumulative number
                              bshark_num_cum,
                              exp_BS_price*bshark_num_cum,
                              days_past*bshark_num_cum,
                              days_past^2*bshark_num_cum,
                              ope_days_past*bshark_num_cum,
                              ope_days_past^2*bshark_num_cum,
                              
                              # Oil Prices
                              oil_price,
                              oil_price*exp_BS_price,
                              oil_price*exp_SF_price,
                              oil_price*days_past,
                              oil_price*days_past^2,
                              oil_price*ope_days_past,
                              oil_price*ope_days_past^2
                              
                              
                              # Total cost
                               ,cum_fuelcost,
                               cum_rev,
                               cum_profit
                   ))


            # convert into matrix form because glmnet object accepts matrix
            nx.temp = as.matrix(dat_temp.pred)
            nx.naive.temp = as.matrix(dat_temp.naive.pred)
            
            dat_temp2$pred_ccp_lasso = predict(lasso_logit.15, newx = nx.temp, s = lambda.min.15, type = "response")
            dat_temp2$pred_ccp_lasso_naive = predict(lasso_logit.15.naive, newx = nx.naive.temp, s = lambda.min.15.naive, type = "response")
            dat_temp2$EV_lasso = (Euler - log(1 - dat_temp2$pred_ccp_lasso))
            dat_temp2$EV_lasso_naive= (Euler - log(1 - dat_temp2$pred_ccp_lasso_naive))
            
  
     # EV_Lasso x prob corresponding to the category i of sword x prob corr j of bshark x prob day l
     array_integ[,i,j,l] = as.numeric(dat_temp2$EV_lasso)*mx_sword_dist_prob.15[,i]*mx_bshark_dist_prob.15[,j]*mx_days_dist_prob.15[,l]
     array_integ_naive[,i,j,l] = as.numeric(dat_temp2$EV_lasso_naive)*mx_sword_dist_prob.15[,i]*mx_bshark_dist_prob.15[,j]*mx_days_dist_prob.15[,l]
     
    } # end of for i
    
  } # end of for j

} # end of for l



# ----- Integrate-----

dat_temp.15$EV_lasso_integ = rowSums(array_integ[,,,])
dat_temp.15$EV_lasso_naive_integ = rowSums(array_integ_naive[,,,])

# need this because orders are different from dat_temp0. search with "order changed here". 
dat3.15 = dat3 %>% filter(ope_num >= 15)

dat3.15 = dat3.15 %>%
  left_join(dat_temp.15 %>% dplyr::select(vname,trip_id,ope_num,EV_lasso_integ,EV_lasso_naive_integ), by = c("vname","trip_id","ope_num"))

##names(dat3.15)

# ===== Calculating the freshness terms ======

# freshness term (Linear)
dat3.15$decay_SF_num = rowSums((dat3.15[,names_days_SF_num]))
dat3.15$decay_SF_wt = rowSums((dat3.15[,names_days_SF_wt]/1000))
dat3.15$decay_BS_num = rowSums((dat3.15[,names_days_BS_num]))
dat3.15$decay_BS_wt = rowSums((dat3.15[,names_days_BS_wt]/1000))

# freshness term (quadratic)
dat3.15$decay2_SF_num = rowSums((dat3.15[,names_days2_SF_num]))
dat3.15$decay2_SF_wt = rowSums((dat3.15[,names_days2_SF_wt]/1000))
dat3.15$decay2_BS_num = rowSums((dat3.15[,names_days2_BS_num]))
dat3.15$decay2_BS_wt = rowSums((dat3.15[,names_days2_BS_wt]/1000))






## ----2nd step structural ---------------------------
# ===== Second Step: Main estimation =====

# baseline model (no freshness term)
logit_2step_num.15.base = glm(choice ~ 0 + cum_profit_interpo + sword_num_cum + bshark_num_cum +
                               offset(EV_lasso_naive_integ), data = dat3.15, family="binomial")

##summary(logit_2step_num.15.base)

# Model 7.2: cumulative profit, 1st degree included (to 2nd order)
logit_2step_num.15.7.2 = glm(choice ~ 0 + cum_profit_interpo + 
                               sword_num_cum +
                               decay_SF_num + 
                               bshark_num_cum +
                               decay_BS_num +
                               offset(EV_lasso_integ), data = dat3.15, family="binomial")


##summary(logit_2step_temp)

#MU of deterioration

MU_fun2 = function(x){ logit_2step_num.15.7.2$coefficients["sword_num_cum"]*mean(dat3.15$ope_num) +  logit_2step_num.15.7.2$coefficients["decay_SF_num"]*x
}



#money fun

money_fun2 = function(x) ( logit_2step_num.15.7.2$coefficients["sword_num_cum"]*x +  logit_2step_num.15.7.2$coefficients["decay_SF_num"]/2*x^2)/(-logit_2step_num.15.7.2$coefficients["cum_profit_interpo"])

##plot(money_fun2(1:40))
##curve(money_fun_temp(x),0,40)


# Model 7.3: cumulative profit, 1st degree inclued (to 3rd order)
logit_2step_num.15.7.3 = glm(choice ~ 0 + cum_profit_interpo +
                               sword_num_cum+
                               decay_SF_num + decay2_SF_num +
                               bshark_num_cum +
                               decay_BS_num + decay2_BS_num + 
                               offset(EV_lasso_integ), data = dat3.15, family="binomial")



money_fun3 = function(x) ( logit_2step_num.15.7.3$coefficients["sword_num_cum"]*x + logit_2step_num.15.7.3$coefficients["decay_SF_num"]/2*x^2 + logit_2step_num.15.7.3$coefficients["decay2_SF_num"]/3*x^3 )/(-logit_2step_num.15.7.3$coefficients["cum_profit_interpo"])



## ----2nd_step_str_result,include=FALSE---------------------------------

# This part is actually not necessary, because we do the same thing for the bootstrapped result. 

# Graph for freshness deterioration

# For model 7.2 with number
theta_22.7.2.15 = as.numeric(logit_2step_num.15.7.2$coefficients["decay_SF_num"]/2) 
theta_23.7.2.15 = 0 # 

theta_21.7.2.15 = as.numeric(logit_2step_num.15.7.2$coefficients["sword_num_cum"])

theta_1.7.2.15 = -as.numeric(logit_2step_num.15.7.2$coefficients["cum_profit_interpo"]) # negative, because the negative sign is expected by noramization



# Deterioration function, raw unit

fresh_fun2 = function(x) (theta_21.7.2.15*x + theta_22.7.2.15*x^2)

ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_fun2) +
  xlab("Days since caught") +
  ylab("Freshness deterioration")

# deterioration function, money value

fresh_money_fun2 = function(x) (theta_21.7.2.15*x + theta_22.7.2.15*x^2)/(theta_1.7.2.15)

saveRDS(fresh_money_fun2,file ="fresh_money_fun2.rds")

ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_money_fun2) +
  labs(x = "Days since caught",
       y= "Freshness deterioration (JPY/fish)",
       title = "Freshness value deterioration, second order")







# For model 7.3
theta_22.7.3.15 = as.numeric(logit_2step_num.15.7.3$coefficients["decay_SF_num"]/2)
theta_23.7.3.15 = as.numeric(logit_2step_num.15.7.3$coefficients["decay2_SF_num"]/3)
theta_21.7.3.15 = as.numeric(logit_2step_num.15.7.3$coefficients["sword_num_cum"])

theta_1.7.3.15 = -as.numeric(logit_2step_num.15.7.3$coefficients["cum_profit_interpo"]) # negative, because the negative sign is expected by noramization


# Deterioration function, raw unit, third order

fresh_fun3 = function(x) (theta_22.7.3.15*x^2 + theta_23.7.3.15*x^3)
ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_fun3) +
  labs(x = "Days since caught",
       y= "Freshness deterioration (per fish)",
       title = "Freshness deterioration, third order")

# deterioration function, money value, third order

fresh_money_fun3 = function(x) (theta_21.7.3.15*x + theta_22.7.3.15*x^2 + theta_23.7.3.15*x^3)/(theta_1.7.3.15)

ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_money_fun3) +
  labs(x = "Days since caught",
       y= "Freshness deterioration (JPY/fish)",
       title = "Freshness value deterioration, third order") + 
  theme(text = element_text(family = "Times New Roman"))



## ----bootstrap, eval=FALSE---------------------------------------------
## 
## # Bootstrap part is set as "eval = FALSE" because it takes long time to run.
## # Leave the original code in order to show the process, but when rendering the pdf, we use the saved file of the result in "bootstrap_result_load"
## 
## # ==== # Bootstrap =======================================
## 
## # time
## bttime = proc.time()
## 
## # Need bootstrap?
## # https://pdfs.semanticscholar.org/910c/117dd67f87c9afeae0c95f5af61a5aada2b3.pdf
## # Standard errors for fitted value in logit: https://stats.stackexchange.com/questions/66946/how-are-the-standard-errors-computed-for-the-fitted-values-from-a-logistic-regre
## 
## # ------ 0. Set the bootstrap paramter, B times, R samples
## B = 500
## nb = length(unique(dat3.15$trip_id)) # number of trips in the data
## 
## # boot strap is stratified by trips
## 
## set.seed(3)
## 
## # Storages for estimated coefficients
## boot_param_7.0 = matrix(nrow = B,ncol = length(logit_2step_num.15.base$coefficients))
## 
## ##boot_param_7.1 = matrix(nrow = B,ncol = 5)
## 
## boot_param_7.2 = matrix(nrow = B,ncol = length(logit_2step_num.15.7.2$coefficients))
## 
## boot_param_7.3 = matrix(nrow = B,ncol = length(logit_2step_num.15.7.3$coefficients))
## 
## 
## for(bb in 1:B){ # ** Start Loop **
##   # Random Draw: Obtain the sub-sample based on the random draw of trip ID
##   tripid_rdm = sample(unique(dat3.15$trip_id), nb, replace=T) # Obtain nb random trips
## 
##   dat3.boot = dat3.15 %>% filter(trip_id %in% tripid_rdm)
## 
## 
##   # ----- 1. Estimate CCP with the sampled data -----
##   dat.boot_LS = with(dat3.boot,
##                      data.frame(choice,
## 
##                               exp_SF_price,
##                               exp_BS_price,
##                               days_past,
##                               days_past^2,
##                               ope_days_past,
##                               ope_days_past^2,
##                               sword_num,
##                               ###sword_wt,
##                               bshark_num,
##                               ###bshark_wt,
##                               sword_unit_wt_interpo,
##                               bshark_unit_wt_interpo,
## 
##                               # unit weight
##                               ##sword_unit_wt_cum,
##                               ##bshark_unit_wt_cum,
##                               exp_SF_price*sword_unit_wt_interpo,
##                               exp_BS_price*bshark_unit_wt_interpo,
##                               ##exp_SF_price*sword_unit_wt_cum,
##                               ##exp_BS_price*bshark_unit_wt_cum,
## 
##                               # SF cumulative Number
##                               sword_num_cum,
##                               exp_SF_price*sword_num_cum,
##                               days_past*sword_num_cum,
##                               days_past^2*sword_num_cum,
##                               ope_days_past*sword_num_cum,
##                               ope_days_past^2*sword_num_cum,
## 
##                               # SF cumulative Weight
##                               ###sword_wt_cum,
##                               ###exp_SF_price*sword_wt_cum,
##                               ###days_past*sword_wt_cum,
##                               ###days_past^2*sword_wt_cum,
##                               ###ope_days_past*sword_wt_cum,
##                               ###ope_days_past^2*sword_wt_cum,
## 
##                               # BS cumulative number
##                               bshark_num_cum,
##                               exp_BS_price*bshark_num_cum,
##                               days_past*bshark_num_cum,
##                               days_past^2*bshark_num_cum,
##                               ope_days_past*bshark_num_cum,
##                               ope_days_past^2*bshark_num_cum,
## 
##                               # BS cumulative Weight
##                               ###bshark_wt_cum,
##                               ###exp_BS_price*bshark_wt_cum,
##                               ###days_past*bshark_wt_cum,
##                               ###days_past^2*bshark_wt_cum,
##                               ###ope_days_past*bshark_wt_cum,
##                               ###ope_days_past^2*bshark_wt_cum,
## 
##                               # Oil Prices
##                               oil_price,
##                               oil_price*exp_BS_price,
##                               oil_price*exp_SF_price,
##                               oil_price*days_past,
##                               oil_price*days_past^2,
##                               oil_price*ope_days_past,
##                               oil_price*ope_days_past^2
## 
##                               # Total cost
##                               ,cum_fuelcost,
##                               cum_rev,
##                               cum_profit
## 
## 
##                               ### dat3[,names_days],
## 
##                               ,dat3.boot[,names_days_SF_num],  # days*# of SF num s days ago
##                               dat3.boot[,names_days_BS_num] # days*# of BS wt s days ago
## 
##                               # square days
##                               ##,dat3[,names_days2_SF_num],
##                               ##dat3[,names_days2_BS_wt]
##                    ))
## 
##   # Convert into vector and matrix for lasso logit
##   y.boot = as.vector(dat.boot_LS$choice)
##   x.boot = as.matrix(dat.boot_LS[,-1])
##   x.naive.boot = as.matrix(dat.boot_LS[,-c(1,36:ncol(dat.boot_LS))])
##   # xdat = dat.temp_LS[,colSums(abs(dat.temp_LS)) !=0]
## 
##   # CV lasso logit: to find lambda_min
##   ## MSEs.boot = NULL
## 
##   cv.boot = cv.glmnet(x.boot,y.boot,
##                       family = "binomial",
##                       type.measure = "class",
##                       nfolds = 10,
##                       alpha = 1)
##   ## MSEs.boot <- cbind(MSEs.boot, cv.boot$cvm)
## 
##   cv.naive.boot = cv.glmnet(x.naive.boot,y.boot,
##                       family = "binomial",
##                       type.measure = "class",
##                       nfolds = 10,
##                       alpha = 1)
## 
## 
##   # Get lambda_min
##   lambda.min.boot <- cv.boot$lambda.min
##   lambda.min.naive.boot <- cv.naive.boot$lambda.min
## 
##   # Estimate Lasso Logit to get the object for prediction.
## 
##   lasso_logit.boot = glmnet(x.boot,y.boot, family = "binomial",
##                             alpha = 1)
##   lasso_logit.naive.boot = glmnet(x.naive.boot,y.boot, family = "binomial",
##                             alpha = 1)
## 
##   # ------ 2. Using the estimated lasso_logit, predict the CCP with the bootstrapped sample. ------
## 
##   array_integ.boot = array(0, dim = c(nrow(dat3.boot),10,10,7))
##   array_integ_naive.boot = array(0, dim = c(nrow(dat3.boot),10,10,7))
## 
## # loop to past_days
## for(l in 1:7){
##     # make all variables for integral
##     dat_temp.boot = dat3.boot
## 
##     dat_temp.boot$days_past = dat_temp.boot$days_past + l  # each day
##     dat_temp.boot$days_past2 = (dat_temp.boot$days_past)^2
##     dat_temp.boot$ope_day_past = dat_temp.boot$ope_days_past + l
##     # expected days since catch is: today + (expected days elapsed - days elapsed).
##     # the second term is the expected additional calendar day
##     exp_days_since_catch.15 = data.frame(rep(0,nrow(dat_temp.boot)),dat_temp.boot[,names_days]) + l
## 
##   # loop to make bshark_wt_disc10
##   for(j in 1:10){
## 
##     dat_temp.boot$bshark_num = (j-1)*133.555 + 133.555/2  # take the median of each bin.
## 
##     # loop to make sword_num_disc10 loop
## 
##     for(i in 1:10){
## 
##      dat_temp.boot2 = dat_temp.boot # renew df for new i loop
## 
##      dat_temp.boot2$sword_num = (i-1)*8.33 + 8.33/2  # take the median of each bin.
## 
##      # The data frame of expected interaction of past catch and past days
##      # 1 column delay: catch_1_ago is now catch_2_ago
##       exp_SF_num_past.15 = data.frame(dat_temp.boot2$sword_num,dat_temp.boot2[,names_SF_num])
##       ##exp_SF_wt_past.15 = data.frame(dat_temp.boot2$exp_SF_wt,dat_temp.boot2[,names_SF_wt])
##       exp_BS_num_past.15 = data.frame(dat_temp.boot2$bshark_num,dat_temp.boot2[,names_BS_num])
##       ## exp_BS_wt_past.15 = data.frame(dat_temp.boot2$bshark_wt,dat_temp.boot2[,names_BS_wt])
## 
## 
##     # make days_elapsed*catch_s_days_ago interaction that is expected.
##       exp_days_SF_num.15 = exp_SF_num_past.15*exp_days_since_catch.15
##       ##exp_days_SF_wt.15 = exp_SF_wt_past.15*exp_days_since_catch.15
##       exp_days_BS_num.15 = exp_BS_num_past.15*exp_days_since_catch.15
##       ##exp_days_BS_wt.15 = exp_BS_wt_past.15*exp_days_since_catch.15
##       exp_days2_SF_num.15 = exp_SF_num_past.15*exp_days_since_catch.15^2
##       ##exp_days2_SF_wt.15 = exp_SF_wt_past.15*exp_days_since_catch.15^2
##       exp_days2_BS_num.15 = exp_BS_num_past.15*exp_days_since_catch.15^2
##       ##exp_days2_BS_wt.15 = exp_BS_wt_past.15*exp_days_since_catch.15^2
## 
##     # change column name because days_catch_1 is now days_catch_2 in the one period later
##       colnames(exp_days_SF_num.15) = paste("days_SF_num",1:36,"ago",sep ="_")
##       ##colnames(exp_days_SF_wt.15) = paste("days_SF_wt",1:36,"ago",sep ="_")
##       colnames(exp_days_BS_num.15) = paste("days_BS_num",1:36,"ago",sep ="_")
##       ##colnames(exp_days_BS_wt.15) = paste("days_BS_wt",1:36,"ago",sep ="_")
##       colnames(exp_days2_SF_num.15) = paste("days2_SF_num",1:36,"ago",sep ="_")
##       ##colnames(exp_days2_SF_wt.15) = paste("days2_SF_wt",1:36,"ago",sep ="_")
##       colnames(exp_days2_BS_num.15) = paste("days2_BS_num",1:36,"ago",sep ="_")
##       ##colnames(exp_days2_BS_wt.15) = paste("days2_BS_wt",1:36,"ago",sep ="_")
## 
##       dat_temp.boot2 = dat_temp.boot2 %>%
##         mutate(sword_num_cum = sword_num_cum + sword_num,
##                ##sword_wt_cum = sword_wt_cum + exp_SF_wt/1000,
##                bshark_num_cum = bshark_num_cum + bshark_num,
##                ##bshark_wt_cum = bshark_wt_cum + bshark_wt/1000
##                )
## 
##     # replace past catch-days interaction
##       dat_temp.boot2[,names_days_SF_num] = exp_days_SF_num.15[,names_days_SF_num]
##       #dat_temp.boot2[,names_days_SF_wt] = exp_days_SF_wt.15[,names_days_SF_wt]
##       dat_temp.boot2[,names_days_BS_num] = exp_days_BS_num.15[,names_days_BS_num]
##       #dat_temp.boot2[,names_days_BS_wt] = exp_days_BS_wt.15[,names_days_BS_wt]
##       dat_temp.boot2[,names_days2_SF_num] = exp_days2_SF_num.15[,names_days2_SF_num]
##       #dat_temp.boot2[,names_days2_SF_wt] = exp_days2_SF_wt.15[,names_days2_SF_wt]
##       dat_temp.boot2[,names_days2_BS_num] = exp_days2_BS_num.15[,names_days2_BS_num]
##       # dat_temp.boot2[,names_days2_BS_wt] = exp_days2_BS_wt.15[,names_days2_BS_wt]
## 
##       # make x for glmnet
##   dat_temp.pred.boot = with(dat_temp.boot2,
##                    data.frame(exp_SF_price,
##                                       exp_BS_price,
##                                       days_past,
##                                       days_past^2,
##                                       ope_days_past,
##                                       ope_days_past^2,
##                                       sword_num,
##                                       ###sword_wt,
##                                       bshark_num,
##                                       ###bshark_wt,
##                                       sword_unit_wt_interpo,
##                                       bshark_unit_wt_interpo,
## 
##                                       # unit weight
##                                       ##sword_unit_wt_cum,
##                                       ##bshark_unit_wt_cum,
##                                       exp_SF_price*sword_unit_wt_interpo,
##                                       exp_BS_price*bshark_unit_wt_interpo,
##                                       ##exp_SF_price*sword_unit_wt_cum,
##                                       ##exp_BS_price*bshark_unit_wt_cum,
## 
##                                       # SF cumulative Number
##                                       sword_num_cum,
##                                       exp_SF_price*sword_num_cum,
##                                       days_past*sword_num_cum,
##                                       days_past^2*sword_num_cum,
##                                       ope_days_past*sword_num_cum,
##                                       ope_days_past^2*sword_num_cum,
## 
##                                       # SF cumulative Weight
##                                       ###sword_wt_cum,
##                                       ###exp_SF_price*sword_wt_cum,
##                                       ###days_past*sword_wt_cum,
##                                       ###days_past^2*sword_wt_cum,
##                                       ###ope_days_past*sword_wt_cum,
##                                       ###ope_days_past^2*sword_wt_cum,
## 
##                                       # BS cumulative number
##                                       bshark_num_cum,
##                                       exp_BS_price*bshark_num_cum,
##                                       days_past*bshark_num_cum,
##                                       days_past^2*bshark_num_cum,
##                                       ope_days_past*bshark_num_cum,
##                                       ope_days_past^2*bshark_num_cum,
## 
##                                       # BS cumulative Weight
##                                       ###bshark_wt_cum,
##                                       ###exp_BS_price*bshark_wt_cum,
##                                       ###days_past*bshark_wt_cum,
##                                       ###days_past^2*bshark_wt_cum,
##                                       ###ope_days_past*bshark_wt_cum,
##                                       ###ope_days_past^2*bshark_wt_cum,
## 
##                                       # Oil Prices
##                                       oil_price,
##                                       oil_price*exp_BS_price,
##                                       oil_price*exp_SF_price,
##                                       oil_price*days_past,
##                                       oil_price*days_past^2,
##                                       oil_price*ope_days_past,
##                                       oil_price*ope_days_past^2
## 
##                                       # Total cost
##                                       ,cum_fuelcost,
##                                       cum_rev,
##                                       cum_profit
## 
## 
##                                       ### dat3[,names_days],
## 
##                                       ,dat_temp.boot2[,names_days_SF_num],  # days*# of SF num s days ago
##                                       dat_temp.boot2[,names_days_BS_num] # days*# of BS wt s days ago
##                                       # days squares
##                                       ##,dat_temp2[,names_days2_SF_num],
##                                       ##dat_temp2[,names_days2_BS_wt]
##                    ))
## 
##   # make x for glmnet for naive prediction (no interaction of days and catch timing)
##   dat_temp.naive.pred.boot = with(dat_temp.boot2,
##                    data.frame(exp_SF_price,
##                                             exp_BS_price,
##                                             days_past,
##                                             days_past^2,
##                                             ope_days_past,
##                                             ope_days_past^2,
##                                             sword_num,
##                                             ###sword_wt,
##                                             bshark_num,
##                                             ###bshark_wt,
##                                             sword_unit_wt_interpo,
##                                             bshark_unit_wt_interpo,
## 
##                                             # unit weight
##                                             ##sword_unit_wt_cum,
##                                             ##bshark_unit_wt_cum,
##                                             exp_SF_price*sword_unit_wt_interpo,
##                                             exp_BS_price*bshark_unit_wt_interpo,
##                                             ##exp_SF_price*sword_unit_wt_cum,
##                                             ##exp_BS_price*bshark_unit_wt_cum,
## 
##                                             # SF cumulative Number
##                                             sword_num_cum,
##                                             exp_SF_price*sword_num_cum,
##                                             days_past*sword_num_cum,
##                                             days_past^2*sword_num_cum,
##                                             ope_days_past*sword_num_cum,
##                                             ope_days_past^2*sword_num_cum,
## 
##                                             # SF cumulative Weight
##                                             ###sword_wt_cum,
##                                             ###exp_SF_price*sword_wt_cum,
##                                             ###days_past*sword_wt_cum,
##                                             ###days_past^2*sword_wt_cum,
##                                             ###ope_days_past*sword_wt_cum,
##                                             ###ope_days_past^2*sword_wt_cum,
## 
##                                             # BS cumulative number
##                                             bshark_num_cum,
##                                             exp_BS_price*bshark_num_cum,
##                                             days_past*bshark_num_cum,
##                                             days_past^2*bshark_num_cum,
##                                             ope_days_past*bshark_num_cum,
##                                             ope_days_past^2*bshark_num_cum,
## 
##                                             # BS cumulative Weight
##                                             ###bshark_wt_cum,
##                                             ###exp_BS_price*bshark_wt_cum,
##                                             ###days_past*bshark_wt_cum,
##                                             ###days_past^2*bshark_wt_cum,
##                                             ###ope_days_past*bshark_wt_cum,
##                                             ###ope_days_past^2*bshark_wt_cum,
## 
##                                             # Oil Prices
##                                             oil_price,
##                                             oil_price*exp_BS_price,
##                                             oil_price*exp_SF_price,
##                                             oil_price*days_past,
##                                             oil_price*days_past^2,
##                                             oil_price*ope_days_past,
##                                             oil_price*ope_days_past^2
## 
## 
##                                             # Total cost
##                                             ,cum_fuelcost,
##                                             cum_rev,
##                                             cum_profit
## 
## 
##                                             ### dat3[,names_days],
## 
##                                             ###,dat3[,names_days_SF_num],  # days*# of SF num s days ago
##                                             ### dat3[,names_days_BS_wt] # days*# of BS wt s days ago
##                                  ))
## 
## 
##             # convert into matrix form because glmnet object accepts matrix
##             nx.temp.boot = as.matrix(dat_temp.pred.boot)
##             nx.naive.temp.boot = as.matrix(dat_temp.naive.pred.boot)
## 
##             dat_temp.boot2$pred_ccp_lasso = predict(lasso_logit.boot, newx = nx.temp.boot, s = lambda.min.boot, type = "response")
##             dat_temp.boot2$pred_ccp_lasso = ifelse(dat_temp.boot2$pred_ccp_lasso == 1, 0.999999,dat_temp.boot2$pred_ccp_lasso)
##             dat_temp.boot2$pred_ccp_lasso_naive = predict(lasso_logit.naive.boot, newx = nx.naive.temp.boot, s = lambda.min.naive.boot, type = "response")
##             dat_temp.boot2$pred_ccp_lasso_naive = ifelse(dat_temp.boot2$pred_ccp_lasso_naive == 1, 0.999999,dat_temp.boot2$pred_ccp_lasso_naive)
##             dat_temp.boot2$EV_lasso = (Euler - log(1 - dat_temp.boot2$pred_ccp_lasso))
##             dat_temp.boot2$EV_lasso_naive= (Euler - log(1 - dat_temp.boot2$pred_ccp_lasso_naive))
## 
## 
##      # EV_Lasso x prob corresponding to the category i of sword x prob corr j of bshark x prob day l
##             # for bootstrap, need to use index from dat_temp.15 to align for the matrices
##      array_integ.boot[,i,j,l] = as.numeric(dat_temp.boot2$EV_lasso)*mx_sword_dist_prob.15[dat_temp.15$trip_id %in% tripid_rdm,i]*mx_bshark_dist_prob.15[dat_temp.15$trip_id %in% tripid_rdm,j]*mx_days_dist_prob.15[dat_temp.15$trip_id %in% tripid_rdm,l]
##      array_integ_naive.boot[,i,j,l] = as.numeric(dat_temp.boot2$EV_lasso_naive)*mx_sword_dist_prob.15[dat_temp.15$trip_id %in% tripid_rdm,i]*mx_bshark_dist_prob.15[dat_temp.15$trip_id %in% tripid_rdm,j]*mx_days_dist_prob.15[dat_temp.15$trip_id %in% tripid_rdm,l]
## 
##     } # end of for i
## 
##   } # end of for j
## 
## } # end of for l
## 
## 
## 
## # ----- Integrate = sum over the possible states with associated prob.-----
## 
## dat3.boot$EV_lasso_integ = rowSums(array_integ.boot[,,,])
## dat3.boot$EV_lasso_naive_integ = rowSums(array_integ_naive.boot[,,,])
## 
## 
##   # ------ 3. Using the EV_lasso, estimate the structural parameter. -------
## 
##   # Model 7.0: cumulative profit
##   logit_2step_7.0.boot = glm(choice ~ 0 + cum_profit_interpo + sword_num_cum + bshark_num_cum +
##                                 offset(EV_lasso_naive_integ), data = dat3.boot, family="binomial")
##
## 
##   # Model 7.1: cumulative profit + second order
##   ##logit_2step_7.1.boot = glm(choice ~ cum_profit + sword_num_cum + decay_SF_num + bshark_num_cum +decay_BS_num + offset(EV_lasso), data = dat3.boot, family="binomial")
##   ## summary(logit_2step_7.1)
## 
##   # Model 7.2: cumulative profit + third order w/o first order
##   logit_2step_7.2.boot = glm(choice ~ 0 + cum_profit_interpo +
##                                sword_num_cum +
##                                decay_SF_num +
##                                bshark_num_cum +
##                                decay_BS_num +
##                                offset(EV_lasso_integ),
##                              data = dat3.boot, family="binomial")
##   ## summary(logit_2step_7.2.boot)
## 
##   # Model 7.3: cumulative profit + third order with first order
##   logit_2step_7.3.boot = glm(choice ~ 0 + cum_profit_interpo +
##                                sword_num_cum+
##                                decay_SF_num + decay2_SF_num +
##                                bshark_num +
##                                decay_BS_num + decay2_BS_num +
##                                offset(EV_lasso_integ),
##                              data = dat3.boot, family="binomial")
##   ## summary(logit_2step_7.3.boot)
## 
##   library(lmtest)
##   ##lrtest(logit_2step_7.3.boot,logit_2step_7.2.boot)
## 
##   # Save estimated coefficients
## 
##   boot_param_7.0[bb,] = coef(logit_2step_7.0.boot)
## 
##   ##boot_param_7.1[bb,] = coef(logit_2step_7.1.boot)
## 
##   boot_param_7.2[bb,] = coef(logit_2step_7.2.boot)
## 
##   boot_param_7.3[bb,] = coef(logit_2step_7.3.boot)
## 
##   print(paste0("end: ",bb,"th loop"))
##   print(paste0("time: ",(proc.time()["elapsed"]-bttime["elapsed"])/60," min"))
## 
## } # end loop bb
## 
## # ------- 4. Calculate bias corrected estimates and standard errors. -------
## 
## ##num = which(boot_param_7.0[,1] > 100)
## 
## ##write.csv(boot_param_7.0,"boot_param_7.0_011218.csv")
## ##write.csv(boot_param_7.1,"boot_param_7.1_011218.csv")
## datenum = format(Sys.Date(), "%d%m%y")
## 
## filename7.0 = paste0("boot_param_7.0_",datenum,".csv")
## filename7.2 = paste0("boot_param_7.2_",datenum,".csv")
## filename7.3 = paste0("boot_param_7.3_",datenum,".csv")
## 
## write.csv(boot_param_7.0,filename7.0)
## write.csv(boot_param_7.2,filename7.2)
## write.csv(boot_param_7.3,filename7.3)
## 
## 
## bttime_end = proc.time() - bttime # 30 hours on Lenov, 20 hours on macbook
## 
## print(bttime_end)
## 


## ----bootstrap_result_load, include = FALSE, eval = TRUE---------------
# instead of running "bootstrap" chunk, we load the result.
# because it takes looong time.
# If we want to run, set eval = TRUE for bootstrap chunk, and set eval = FALSE for this chunk
boot_param_7.0_ = read_csv("boot_param_7.0_010720.csv")%>%
                 dplyr::select(-X1) %>%
                 as.matrix()
boot_param_7.2_ = read_csv("boot_param_7.2_010720.csv")%>%
                 dplyr::select(-X1) %>%
                 as.matrix()
boot_param_7.3_ = read_csv("boot_param_7.3_010720.csv")%>%
                 dplyr::select(-X1) %>%
                 as.matrix()

B = nrow(boot_param_7.2_)
nb = length(unique(dat3.15$trip_id))


## ----bootstrap_result2, include = FALSE, eval = TRUE-------------------

# Here we use the result of bootstrap to show the actual result in the paper

# Bootstrap sample mean
boot_param_star_7.0 = colMeans(boot_param_7.0_,na.rm = TRUE)
##boot_param_star_7.1 = colMeans(boot_param_7.1_,na.rm = TRUE)
boot_param_star_7.2 = colMeans(boot_param_7.2_,na.rm = TRUE)
boot_param_star_7.3 = colMeans(boot_param_7.3_,na.rm = TRUE)



# Bootstrap corrected estimate: tilde = 2*hat - star = hat - (star - hat)
# tilde: bootstrap corrected, star: bootstrap mean, hat: original estimates, 
boot_param_tilde_7.0 = 2*coef(logit_2step_num.15.base) - boot_param_star_7.0
##boot_param_tilde_7.1 = 2*coef(logit_2step_7.1) - boot_param_star_7.1
boot_param_tilde_7.2 = 2*coef(logit_2step_num.15.7.2) - boot_param_star_7.2
boot_param_tilde_7.3 = 2*coef(logit_2step_num.15.7.3) - boot_param_star_7.3



# Bootstrap corrected variance and S.D. 

# Bootstrap corrected variance and SD for model 7.2
# note: apply(x,i,f): apply function (f) to matrix or df (x) by every row (i=1) or every column (i=2)
# In this case, apply substraction to boot_param by boot_param_star by every row. 
boot_param_var_7.2 = colSums(t(apply(boot_param_7.2_,1,'-',boot_param_star_7.2))^2,na.rm = TRUE)/B

boot_param_sd_7.2 = sqrt(boot_param_var_7.2)

# Bootstrap corrected variance and SD for model 7.3
boot_param_var_7.3 = colSums(t(apply(boot_param_7.3_,1,'-',boot_param_star_7.3))^2,na.rm = TRUE)/B

boot_param_sd_7.3 = sqrt(boot_param_var_7.3)

# p-values
pnorm(boot_param_tilde_7.2[3],mean = 0,sd = boot_param_sd_7.2[3])
pnorm(boot_param_tilde_7.2[4],mean = 0,sd = boot_param_sd_7.2[4])



# Plot the result
# Graph for freshness deterioration with bootstrap corrected params

# For model 7.2
theta_21.7.2.15.boot = as.numeric(boot_param_tilde_7.2["sword_num_cum"])
theta_22.7.2.15.boot = as.numeric(boot_param_tilde_7.2["decay_SF_num"]/2)
theta_23.7.2.15.boot = 0 # 2nd order, so 3rd order term is zero

theta_21_BS.7.2.15.boot = as.numeric(boot_param_tilde_7.2["bshark_num_cum"])
theta_22_BS.7.2.15.boot = as.numeric(boot_param_tilde_7.2["decay_BS_num"]/2)
theta_23_BS.7.2.15.boot = 0 # 2nd order, so 3rd order term is zero

theta_1.7.2.15.boot = as.numeric(boot_param_tilde_7.2["cum_profit_interpo"]) # negative, because the negative sign is expected by normaization -> 2021/2/1 changed to positive as the reviewer suggested to report the positive to consistent with the equation (10) (and it is more intuitive)


# For model 7.3
theta_21.7.3.15.boot = as.numeric(boot_param_tilde_7.3["sword_num_cum"])
theta_22.7.3.15.boot = as.numeric(boot_param_tilde_7.3["decay_SF_num"]/2)
theta_23.7.3.15.boot = as.numeric(boot_param_tilde_7.3["decay2_SF_num"]/3)

theta_21_BS.7.3.15.boot = as.numeric(boot_param_tilde_7.3["bshark_num_cum"])
theta_22_BS.7.3.15.boot = as.numeric(boot_param_tilde_7.3["decay_BS_num"]/2)
theta_23_BS.7.3.15.boot = as.numeric(boot_param_tilde_7.3["decay2_BS_num"]/3)



theta_1.7.3.15.boot = as.numeric(boot_param_tilde_7.3["cum_profit_interpo"]) # negative, because the negative sign is expected by normaization -> 2021/2/1 changed to positive as the reviewer suggested to report the positive to consistent with the equation (10) (and it is more intuitive)

# Deterioration function, raw unit
fresh_fun2.boot = function(x) (theta_21.7.2.15.boot*x + theta_22.7.2.15.boot*x^2)

fresh_fun3.boot = function(x) (theta_21.7.3.15.boot*x + theta_22.7.3.15.boot*x^2 + theta_23.7.3.15.boot*x^3)


# Deterioration function for Blue Shark, raw unit
fresh_fun2_BS.boot = function(x) (theta_21_BS.7.2.15.boot*x + theta_22_BS.7.2.15.boot*x^2)

fresh_fun3_BS.boot = function(x) (theta_21_BS.7.3.15.boot*x + theta_22_BS.7.3.15.boot*x^2 + theta_23_BS.7.3.15.boot*x^3)

# deterioration function, money value
fresh_money_fun2.boot = function(x) (theta_21.7.2.15.boot*x + theta_22.7.2.15.boot*x^2)/(-theta_1.7.2.15.boot)

# deterioration function, money value
# per weight. simply divided by 60, which is median of the unit weight of swordfish.
fresh_money_fun2_per_wt.boot = function(x) (theta_21.7.2.15.boot*x + theta_22.7.2.15.boot*x^2)/(60*-theta_1.7.2.15.boot)


# Blue Shark

# Money value
fresh_money_fun2_BS.boot = function(x) (theta_21_BS.7.2.15.boot*x + theta_22_BS.7.2.15.boot*x^2)/(-theta_1.7.2.15.boot)

##plot(fresh_money_fun2_BS.boot(1:40))

fresh_money_fun3_BS.boot = function(x) (theta_21_BS.7.3.15.boot*x + theta_22_BS.7.3.15.boot*x^2 + theta_23_BS.7.3.15.boot*x^3)/(-theta_1.7.3.15.boot)

##plot(fresh_money_fun3_BS.boot(1:40))


# make glm object with bootstrap result (for Table)
logit_2step_num.15.7.2.boot = logit_2step_num.15.7.2
logit_2step_num.15.7.3.boot = logit_2step_num.15.7.3



# replace the coefficient
logit_2step_num.15.7.2.boot$coefficients = boot_param_tilde_7.2
logit_2step_num.15.7.3.boot$coefficients = boot_param_tilde_7.3 ## temp

# adjustment of unit: parameter
logit_2step_num.15.7.2.boot$coefficients = logit_2step_num.15.7.2.boot$coefficients*1000

logit_2step_num.15.7.3.boot$coefficients = logit_2step_num.15.7.3.boot$coefficients*1000

# adjust sign: 
# as discussed above, Line2634, it is intuitive to show the positive sign for theta1.
logit_2step_num.15.7.2.boot$coefficients["cum_profit_interpo"] = -logit_2step_num.15.7.2.boot$coefficients["cum_profit_interpo"]

logit_2step_num.15.7.3.boot$coefficients["cum_profit_interpo"] = -logit_2step_num.15.7.3.boot$coefficients["cum_profit_interpo"]

# adjustment of unit: sd
boot_param_sd_7.2 = boot_param_sd_7.2*1000
boot_param_sd_7.3 = boot_param_sd_7.3*1000   # temp
## boot_param_sd_7.3 = sqrt(diag(vcov(logit_2step_num.15.7.3)))*1000     # temp

names(boot_param_sd_7.2) = names(logit_2step_num.15.7.2.boot$coefficients)
names(boot_param_sd_7.3) = names(logit_2step_num.15.7.3.boot$coefficients)




# Table output
tab5 <- stargazer(logit_2step_num.15.7.2.boot,logit_2step_num.15.7.3.boot,
          se = list(as.vector(boot_param_sd_7.2),
                    as.vector(boot_param_sd_7.3)),
          dep.var.labels = "Continuation decision",
          column.labels = c("Model 1","Model 2"),
          order = c("cum_profit_interpo","sword_num_cum","decay_SF_num","decay2_SF_num","bshark_num_cum","decay_BS_num","decay2_BS_num"),
          omit = c("^sword_num_cum$","^bshark_num_cum$"),
          covariate.labels = c("$\\theta_{1}$(Profit)","$\\theta_{21}^{SF}$","$2\\theta_{22}^{SF}$","$3\\theta_{23}$","$\\theta_{21}^{BS}$","$2\\theta_{22}^{BS}$","$3\\theta_{23}^{BS}$"),
          title = "\\label{tab:2nd}The result of structural parameters estimation",
          notes.align = "l",notes.append = FALSE,
          notes = "Bootstrapped standard errors in parentheses",
          header = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001))

note.tab5 <- "\\multicolumn{3}{l} {\\parbox[t]{10cm}{ \\textit{Notes:} Bootstrapped standard errors in parentheses. All parameters are scaled by 1000 as the raw values are small. *p<0.05;**p<0.01;***p<0.001}} \\\\"
tab5[grepl("Note",tab5)] <- note.tab5



## ----2nd_step_res_table, eval = TRUE,results='asis'--------------------

cat(tab5,sep = "\n")



## ---- back of envelop ---------------------


mean_sword_unit_wt = mean(dat3[dat3$sword_unit_wt > 0,]$sword_unit_wt)
mean_bshark_unit_wt = mean(dat3[dat3$bshark_unit_wt > 0,]$bshark_unit_wt)

mean_unit_price_sf = mean(dat3$exp_SF_price)
mean_price_per_sf =  mean_sword_unit_wt*mean_unit_price_sf

mean_unit_price_bs = mean(dat3$exp_BS_price)
# value per unit weight functions
# deterioration function, money value
# per weight.  The mean of the unit weight of swordfish.
fresh_money_fun2_per_wt.boot = function(x) (theta_21.7.2.15.boot*x + theta_22.7.2.15.boot*x^2)/(mean_sword_unit_wt*-theta_1.7.2.15.boot)



# According to the price per fish data, (Kesennuma market)
# high price per fish under 100kg could be around 150,000 JPY. Lower could 10,000. 

 # find the average "age" of fish per trip 
##colnames(dat3.15)

dat.temp = dat3.15 %>%
  filter(ope_num == ope_length) %>%
  mutate(ave_day_SF_num = decay_SF_num/sword_num_cum,
         ave_day_BS_num = decay_BS_num/bshark_num_cum)

##summary(dat.temp$ave_day_SF_num)


fresh10 = round(fresh_money_fun2.boot(10),digits = 0)
fresh20 = round(fresh_money_fun2.boot(20), digits = 0)
fresh30 = round(fresh_money_fun2.boot(30), digits = 0)
fresh_mean = round(fresh_money_fun2.boot(mean(dat.temp$ave_day_SF_num)), digits = 0)

# back of envelope

possible_high_price = round(mean_price_per_sf + (-fresh_mean), digits = 0)

fresh10_pwt = round(fresh_money_fun2_per_wt.boot(10),digits = 0)
fresh20_pwt = round(fresh_money_fun2_per_wt.boot(20), digits = 0)
fresh30_pwt = round(fresh_money_fun2_per_wt.boot(30), digits = 0)
fresh_mean_pwt = round(fresh_money_fun2_per_wt.boot(mean(dat.temp$ave_day_SF_num)), digits = 0)


possible_high_price_pwt = round(mean_price_per_sf/mean_sword_unit_wt + (-fresh_mean_pwt), digits = 0)






## ---- blue shark check ----------------------------
   
# per weight value function
fresh_money_fun2_BS_per_wt.boot = function(x) (theta_21_BS.7.2.15.boot*x + theta_22_BS.7.2.15.boot*x^2)/(mean_bshark_unit_wt*-theta_1.7.2.15.boot)

 # blue shark representative values

  freshBS10 = round(fresh_money_fun2_BS.boot(10), digits = 0)
  freshBS20 = round(fresh_money_fun2_BS.boot(20), digits = 0)
  freshBS30 = round(fresh_money_fun2_BS.boot(30), digits = 0)

  # third order
  freshBS3_30 = round(fresh_money_fun3_BS.boot(30), digits = 0) 
  
  # per weight
  freshBS10_pwt = round(fresh_money_fun2_BS_per_wt.boot(10),digits = 0)
  freshBS20_pwt = round(fresh_money_fun2_BS_per_wt.boot(20), digits = 0)
  freshBS30_pwt = round(fresh_money_fun2_BS_per_wt.boot(30), digits = 0)
  freshBS_mean_pwt = round(fresh_money_fun2_per_wt.boot(mean(dat.temp$ave_day_BS_num)), digits = 0)


## ---- eval = TRUE, fig.cap="\\label{fig:freshraw}Freshness deterioration function",fig.width = 6,fig.height=4----

plot_freshraw = ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_fun2.boot, aes(lty = "2nd Order")) +
  stat_function(fun=fresh_fun3.boot, aes(lty = "3rd Order")) +
  labs(x = "Days since caught",
       y= "Freshness deterioration (per fish)",
       #title = "Freshness deterioration, second and third order",
       lty = "Specification") + 
  ##annotate(geom = "text", x = 28, y = fresh_fun3.boot(30)-0.1, label = "3rd Order") +
  ##annotate(geom = "text", x = 36, y = fresh_fun2.boot(30)+0.1, label = "2nd Order") +
  theme_bw() + 
  theme(text = element_text(family = "Times New Roman"))

plot_freshraw


## ----freshraw_pub, eval = FALSE----------------------------------------
## # plot for publications
## cairo_ps("figures/pub/freshraw.eps",width = 6, height = 4)
##   plot_freshraw
## dev.off()
## 


## ---- Freshness function in money value  ----

# Deterioration function, money value 
plot_fresh_val = ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_money_fun2.boot,aes(lty = "Swordfish")) +
  stat_function(fun=fresh_money_fun2_BS.boot,aes(lty = "Blue Shark")) +
  annotate(geom = "text", x = 5, y = fresh10-800, label =  fresh10,color = "red") +
  annotate(geom = "text", x = 15, y = fresh20-800, label = fresh20,color = "red") +
  annotate(geom = "text", x = 25, y = fresh30-800, label = fresh30,color = "red") + 
  annotate(geom = "text", x = 30, y = -1300, label = freshBS30,color = "blue") +
  ##annotate(geom = "text", x = 37, y = -5500, label = "Swordfish", family = "Times New Roman") + 
  ##annotate(geom = "text", x = 36, y = -900, label = "Blue Shark",family = "Times New Roman") + 
  geom_point(x = 10, y = fresh10) +
  geom_point(x = 20, y = fresh20) + 
  geom_point(x = 30, y = fresh30) + 
  geom_point(x = 30, y = freshBS30) +
  labs(x = "Days since caught",
       y= "Freshness deterioration (JPY/fish)",
       linetype = "Species"
      # title = "Freshness value deterioration, second order"
       ) +
  theme_bw() + 
  theme(text = element_text(family = "Times New Roman"),
        legend.position = "bottom")



# Deterioration function, money value 
plot_fresh_val_pwt = ggplot(data.frame(x=c(0, 40)), aes(x)) + 
  stat_function(fun=fresh_money_fun2_per_wt.boot,aes(lty = "Swordfish")) +
  stat_function(fun=fresh_money_fun2_BS_per_wt.boot,aes(lty = "Blue Shark")) +
  annotate(geom = "text", x = 6, y = fresh10_pwt-25, label =  fresh10_pwt,color = "red") +
  annotate(geom = "text", x = 15, y = fresh20_pwt-30, label = fresh20_pwt,color = "red") +
  annotate(geom = "text", x = 25, y = fresh30_pwt-30, label = fresh30_pwt,color = "red") + 
  annotate(geom = "text", x = 30, y = freshBS30_pwt-20, label = freshBS30_pwt,color = "blue") +
 ## annotate(geom = "text", x = 25, y = -220, label = "Swordfish",family = "Times New Roman") + 
 ## annotate(geom = "text", x = 36, y = -20, label = "Blue Shark",family = "Times New Roman") + 
  geom_point(x = 10, y = fresh_money_fun2_per_wt.boot(10)) +
  geom_point(x = 20, y = fresh_money_fun2_per_wt.boot(20)) + 
  geom_point(x = 30, y = fresh_money_fun2_per_wt.boot(30)) + 
  geom_point(x = 30, y = fresh_money_fun2_BS_per_wt.boot(30)) +
  labs(x = "Days since caught",
       y= "Freshness deterioration (JPY/kg)",
       linetype = "Species"
      # title = "Freshness value deterioration, second order"
       ) +
  #guides(linetype = guide_legend(nrow = 2)) +
  theme_bw() + 
  theme(text = element_text(family = "Times New Roman"),
        legend.position = "bottom")


plot_fresh_val + plot_fresh_val_pwt + plot_annotation(tag_levels = 'A') + plot_layout(guides= "collect") & 
  theme(text = element_text(family = "Times New Roman"),
        legend.position = "bottom")



## ----------------------------------------------------------------------
cairo_ps("figures/pub/freshmoney.eps",width = 6, height = 4)
  plot_fresh_val + plot_fresh_val_pwt + plot_annotation(tag_levels = 'A') + plot_layout(guides= "collect") & 
  theme(text = element_text(family = "Times New Roman"),
        legend.position = "bottom")
dev.off()





